/*=======================================================================
Creator: Jingyuan Wang, jingyuanwang@u.northwestern.edu
Date created: 				08/17/2018
Date last modified: 		12/25/2018
Purpose: 

==========================================================================*/


*************************************************************************
* 				PART A. import QCEW data and make it ready to merge		*
*************************************************************************

use "$QCEW/QCEW_state_ind_year.dta", clear

* generate state string code
preserve
	import delimited using "$QCEWraw/codebook/state_titles.csv", varn(1) clear
	tempfile state_titles
	save `state_titles.dta', replace
restore

merge m:1 state_code using `state_titles.dta'
keep if _merge == 3
drop _merge

tempfile QCEW
save `QCEW.dta', replace

*************************************************************************
* 				PART B. import SEDS data and make it ready to merge		*
*************************************************************************

use "$SEDS/industrialpriceexpenditure_state_year_fuel.dta", clear

* 1. keep only 8 selected energy type
* to match with MECS energy expenditure data, we only keep these 8 types
keep if energy_type == "ES" | energy_type == "RF" | energy_type == "DF" ///
		| energy_type == "NG" | energy_type == "HL" | energy_type == "CL" ///
		| energy_type == "PC" | energy_type == "PE"
 
* 2. reshape to state-year: each obs is a state-year
* (1) make energy type a int instead of string
sort energy_type
egen energy_code = group(energy_type)

preserve
	keep energy_code energy_type
	duplicates drop
	save "$SEDSraw/codebook/energy_type.dta", replace
restore
drop energy_type

* (2) reshape
rename price price_
rename expenditure expenditure_
reshape wide price_ expenditure_, i(state year) j(energy_code)

* (3) label
forvalues i = 1(1)8 {
	label var price_`i' "price in dollars per million Btu"
	label var expenditure_`i' "expenditure in million dollars"
}
*
* (4) rename 
rename price_1 price_CL
rename expenditure_1 expenditure_CL
rename price_2 price_DF
rename expenditure_2 expenditure_DF
rename price_3 price_elec
rename expenditure_3 expenditure_elec
rename price_4 price_HL
rename expenditure_4 expenditure_HL
rename price_5 price_NG
rename expenditure_5 expenditure_NG
rename price_6 price_PC
rename expenditure_6 expenditure_PC
rename price_7 price_PE
rename expenditure_7 expenditure_PE
rename price_8 price_RF
rename expenditure_8 expenditure_RF

* 3. save
tempfile SEDS
save `SEDS.dta', replace


*************************************************************************
* 				PART C. import MECS data and make it ready to merge		*
*************************************************************************
use "$MECSenergy/MECS.dta", clear

* to keep at NAICS6 level as much as possible 
keep if level <= 3 | level == 6
rename level level_mecs

* drop 1 obs in 2014 (other sector)
drop if naics == 98

* rename variables
drop order naicsid_str
rename naics naics_mecs
rename industry industry_name 

* save the file
tempfile MECS_naics3_6
save `MECS_naics3_6.dta', replace

* save a list of available NAICS6 industries
keep naics_mecs level_mecs year
preserve
	keep if level_mecs == 6 & year < 2012
	drop level_mecs year
	duplicates drop
	tempfile mecscode_pre2012
	save `mecscode_pre2012.dta', replace
restore
preserve
	keep if level_mecs == 6 & year >= 2012
	drop level_mecs year
	duplicates drop
	tempfile mecscode_post2012
	save `mecscode_post2012.dta', replace
restore

*************************************************************************
* 				PART D.  merge											*
*************************************************************************

* 1. merge QCEW and SEDS
use `QCEW.dta', clear
merge m:1 state year using `SEDS.dta'
keep if year >= 1990 & year <= 2016
keep if state != "PR" 
keep if state != "VI" 
tab _merge
drop _merge 

* 2. merge MECS
replace industry_code = "99" if industry_code == "31-33"

* (1) generate merging key : naics6 if possible
* generate naics3 
gen naics_3digit = substr(industry_code,1,3)
destring naics_3digit, replace
tab naics_3digit if industry_level == 2

* check which naics6 is available
gen naics_mecs = NAICS_code
merge m:1 naics_mecs using `mecscode_pre2012.dta'
rename _merge _merge_pre2012
merge m:1 naics_mecs using `mecscode_post2012.dta'
rename _merge _merge_post2012


* generate the merge key with MECS data, a combination of naics3 and naicd6
replace naics_mecs = naics_3digit if _merge_pre2012 == 1 & year <= 2010
replace naics_mecs = naics_3digit if _merge_post2012 == 1 & year >= 2011
gen fuelshare_ISnaics6level = 0
replace fuelshare_ISnaics6level = 1 if (_merge_pre2012 == 1 & year <= 2010) | (_merge_post2012 == 1 & year >= 2011)
drop _merge*

* (2) merge
merge m:1 naics_mecs year using `MECS_naics3_6.dta'
drop _merge


* save
order naics_3digit naics_mecs fuelshare_ISnaics6level industry_name level_mecs, a( year )
order state state_name, a(state_code)
save "$energy_merged/QCEW_SEDS_MECS_state_ind_year.dta", replace



*************************************************************************
*************************************************************************
*				2018 11 11 update: add Intensities 						*
*************************************************************************
*************************************************************************



*************************************************************************
* 				PART E. merge in eGRID state-level intensites			*
*************************************************************************

use "$energy_merged/QCEW_SEDS_MECS_state_ind_year.dta", clear

merge m:1 state_code state using "$EI/eGRID_2016.dta"
drop _merge
order price_PE expenditure_PE EI* ,a(expenditure_RF)
rename EI_* CI_elec_*
 
*************************************************************************
* 				PART F. type in EIA fuel carbon intensities				*
*************************************************************************
* base on the fuel intensities from EIA
* the file is at "$EIraw/EIA/co2_vol_mass_updated.xls"

gen CI_RF = .
gen CI_DF = 73.16
gen CI_NG = 54.70
gen CI_HL = 63.07
gen CI_CL = 95.35
gen CI_PC = 102.10
gen CI_PE = .
 
foreach v in "NG" "PC" {
	label var CI_`v' "[constant] industrial use intenties, Kilograms CO2 / Million Btu"
}
*
foreach v in "CL" "DF" "HL" {
	label var CI_`v' "[constant] home use intensities , Kilograms CO2 / Million Btu"
}
*

* save
order CI_*, a(expenditure_PE)
save "$energy_merged/QCEW_SEDS_MECS_CI_state_ind_year.dta", replace

